package com.zhida.gooutcrawler.spider.processor;

import com.zhida.gooutcrawler.http.RetrofitFactory;
import com.zhida.gooutcrawler.spider.processor.IUrlProcessor;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.util.ArrayList;
import java.util.List;

/**
 * 采集列表
 * Created by Administrator on 2017-04-05.
 */
public class ListProcessor implements IUrlProcessor {

    public List<String> parse(String html) {
        Document doc = Jsoup.parse(html);
        Elements elements = doc.select("section.newList_01 ul li a");
        List<String> urls = new ArrayList<String>();
        for (int i = 0; i < elements.size(); i++) {
            Element element = elements.get(i);
            String href = element.attr("href");
            href = RetrofitFactory.BASE_URL + href;//加上服务器host
            urls.add(href);
        }
        return urls;
        //.get(0).attr("href")
    }

}
